#######################################################################
##### This file replicates the first study (twitter) in the paper #####
#######################################################################
rm(list = ls())
library(data.table)
library(dplyr)
library(fixest)
library(lfe)
library(lmtest)
library(MASS)
library(plm)
library(sandwich)
library(stargazer)
library(texreg)

orgtweet <- fread("../datasets/1_twitter_tweets.csv", stringsAsFactors = FALSE)

## calculate variables
class(orgtweet$user_created_date)
orgtweet$accountlength <- as.Date("2015-12-31") - as.Date(orgtweet$user_created_date)
orgtweet$mentionout <- orgtweet$mentiontotal - orgtweet$mentionmember
orgtweet$Imentionout <- ifelse(orgtweet$mentionout >= 1, 1, 0)
orgtweet$Ihashtags <- ifelse(orgtweet$hashtags=="[]", 0, 1)
orgtweet$daily_core_prop <- orgtweet$daily_active_core/orgtweet$daily_active_user

### Appendix Table A8: Summary Statistics for Twitter Analyses
colnames(orgtweet)
# [1:6] is for removing the NA column
sumstats <- rbind(round(summary(orgtweet$religscore)[1:6], 3),
                  round(summary(orgtweet$territory_lag_10)[1:6], 3),
                  round(summary(orgtweet$daystoholi)[1:6], 3),
                  round(summary(orgtweet$followers_count)[1:6], 3),
                  round(summary(orgtweet$favourites_count)[1:6], 3),
                  round(summary(orgtweet$friends_count)[1:6], 3),
                  round(summary(orgtweet$statuses_count)[1:6], 3),
                  round(summary(as.numeric(orgtweet$accountlength))[1:6], 3),
                  round(summary(orgtweet$media)[1:6], 3),
                  round(summary(orgtweet$urls)[1:6], 3),
                  round(summary(orgtweet$Ihashtags)[1:6], 3))
row.names(sumstats) <- c("Religiosity Score", "Territory", "Days to Islamic Holidays", "Followers Count", 
                         "Favorites Count", "Friends Count", "Statuses Count", "Account Length", "Media", "Urls", "Hashtags")
stargazer(sumstats, summary = FALSE)


### transform variables
orgtweet$territory <- orgtweet$territory/100
orgtweet$daystoholi <- orgtweet$daystoholi/10000
orgtweet$accountlength <- orgtweet$accountlength/10000
orgtweet$followers_count <- orgtweet$followers_count/100000
orgtweet$favourites_count <- orgtweet$favourites_count/100000
orgtweet$friends_count <- orgtweet$friends_count/100000
orgtweet$statuses_count <- orgtweet$statuses_count/100000


## Appendix Table A16: Tweet Level Analyses

tweet1 <- felm(religscore~territory_lag_10+daystoholi+followers_count+favourites_count+friends_count+
                 statuses_count+accountlength+media+urls+Ihashtags|0|0|user_id, 
              data = orgtweet)
tweet2 <- felm(Imentionout~territory_lag_10+daystoholi+followers_count+favourites_count+friends_count+
                 statuses_count+accountlength+media+urls+Ihashtags|0|0|user_id, 
              data = orgtweet)
tweet3 <- felm(religscore~Imentionout+territory_lag_10+daystoholi+followers_count+favourites_count+friends_count+
                 statuses_count+accountlength+media+urls+Ihashtags|0|0|user_id, 
              data = orgtweet)
stargazer(list(tweet1, tweet2, tweet3), 
          star.cutoffs=c(0.05, 0.01), no.space=TRUE, label="power_mob_score_tweetlevel")


### Appendix Table A18: Organizational Change Does not Explain the Change in Ideological Positions

aggtweets <- orgtweet %>% 
  group_by(created_date) %>% 
  summarize(meanscore = mean(religscore, na.rm = TRUE),
            territory = first(territory),
            lagterritory = first(territory_lag_10),
            totalevent1 = first(totalevent1),
            daystoholi = first(daystoholi),
            meanfollowers = mean(followers_count, na.rm = TRUE),
            meanfavourites = mean(favourites_count, na.rm = TRUE),
            meanfriends = mean(friends_count, na.rm = TRUE),
            meanstatuses = mean(statuses_count, na.rm = TRUE),
            meanaccountlength = mean(accountlength, na.rm = TRUE),
            meanmedia = mean(media, na.rm = TRUE),
            meanurls = mean(urls, na.rm = TRUE),
            meanhashtags = mean(Ihashtags, na.rm = TRUE),
            daily_active_user = first(daily_active_user),
            daily_core_prop = first(daily_core_prop),
            daily_herf = first(daily_herf),
            ntweets = n())


agg.coord.coreprop1 <- lm(daily_core_prop ~ lagterritory + daystoholi, data = aggtweets)
agg.coord.geo1 <- lm(daily_herf ~ lagterritory + daystoholi, data = aggtweets)

org.relig.coreprop1 <- lm(meanscore ~ daily_core_prop + lagterritory + daystoholi, data = aggtweets)
org.relig.geo1 <- lm(meanscore ~ daily_herf + lagterritory + daystoholi, data = aggtweets)

texreg(list(agg.coord.coreprop1, org.relig.coreprop1, agg.coord.geo1, org.relig.geo1))

######################################################################################################################
### Uncomment this section to rerun the code to reform the tweet level data as a panel. It takes a very long time. ###
######################################################################################################################

# # Generate variables about retweeting
# orgtweet$perip_retweet <- orgtweet$members_retweet - orgtweet$core_retweet
# orgtweet$core_perip <- orgtweet$core_retweet - orgtweet$perip_retweet
# orgtweet$out_retweet <- orgtweet$all_retweet - orgtweet$members_retweet
# orgtweet$memb_out <- orgtweet$members_retweet - orgtweet$out_retweet
# orgtweet$geo_noncore_retweet1 <- orgtweet$geo_retweet - orgtweet$geo_core_retweet1
# orgtweet$geo_core1_noncore1 <- orgtweet$geo_core_retweet1 - orgtweet$geo_noncore_retweet1
# orgtweet$memb_out <- orgtweet$memb_out*10
# orgtweet$geo_core1_noncore1 <- orgtweet$geo_core1_noncore1*10
# 
# # Aggregate data by user and date
# panel <- orgtweet %>%
#   group_by(user_id, created_date) %>%
#   summarize(meanscore = mean(religscore, na.rm = TRUE),
#             territory = first(territory),
#             meanmention = mean(Imentionout, na.rm = TRUE),
#             totalevent1 = first(totalevent1),
#             daystoholi = first(daystoholi),
#             meanfollowers = mean(followers_count, na.rm = TRUE),
#             meanfavourites = mean(favourites_count, na.rm = TRUE),
#             meanfriends = mean(friends_count, na.rm = TRUE),
#             meanstatuses = mean(statuses_count, na.rm = TRUE),
#             meanaccountlength = mean(accountlength, na.rm = TRUE),
#             meanmedia = mean(media, na.rm = TRUE),
#             meanurls = mean(urls, na.rm = TRUE),
#             meanhashtags = mean(Ihashtags, na.rm = TRUE),
#             mean_core_perip = mean(core_perip, na.rm = TRUE),
#             mean_memb_out = mean(memb_out, na.rm = TRUE),
#             mean_geo_core1_noncore1 = mean(geo_core1_noncore1, na.rm = TRUE),
#             mean_members_retweet = mean(members_retweet, na.rm = TRUE),
#             mean_out_retweet = mean(out_retweet, na.rm = TRUE),
#             mean_core_retweet = mean(core_retweet, na.rm = TRUE),
#             mean_perip_retweet = mean(perip_retweet, na.rm = TRUE),
#             mean_geocore1_retweet = mean(geo_core_retweet1, na.rm = TRUE),
#             mean_geoperip1_retweet = mean(geo_noncore_retweet1, na.rm = TRUE),
#             daily_active_user = first(daily_active_user),
#             daily_core_prop = first(daily_core_prop),
#             daily_herf = first(daily_herf),
#             ntweets = n())
# 
# write.csv(panel, "../datasets/1_twitter_panel.csv", row.names = FALSE)

panel <- read.csv("../datasets/1_twitter_panel.csv", stringsAsFactors = FALSE)
ppanel <- pdata.frame(panel)     
ppanel$lagmeanscore <- lag(ppanel$meanscore, k=1, shift = "time")
ppanel$lagmeanmention <- lag(ppanel$meanmention, k=1, shift = "time")
ppanel$lagterritory <- lag(ppanel$territory, k=1, shift = "time")
ppanel$lagtotalevent1 <- lag(ppanel$totalevent1, k=1, shift = "time")

### Main Text Table 4: Group Power, Mobilization, and Religiosity Score
felm1 <- felm(meanscore~lagterritory+lagmeanscore+daystoholi+meanfollowers+meanfavourites+
                meanfriends+meanstatuses+meanmedia+meanurls+meanhashtags+meanaccountlength|0|0|user_id, 
              data = ppanel)
felm2 <- felm(meanmention~lagterritory+lagmeanmention+daystoholi+meanfollowers+meanfavourites+
                meanfriends+meanstatuses+meanmedia+meanurls+meanhashtags+meanaccountlength|0|0|user_id, 
              data = ppanel)
felm3 <- felm(meanscore~lagterritory+lagmeanscore+meanmention+daystoholi+meanfollowers+meanfavourites+
              meanfriends+meanstatuses+meanmedia+meanurls+meanhashtags+meanaccountlength|0|0|user_id, 
            data = ppanel)
stargazer(list(felm1, felm2, felm3), 
          star.cutoffs=c(0.05, 0.01), no.space=TRUE, label="power_mob_score")


### Appendix Table A15: Group Power (Alternative Measure), Mobilization, and Religiosity Score
felm4 <- felm(meanscore~lagtotalevent1+lagmeanscore+daystoholi+meanfollowers+meanfavourites+
                meanfriends+meanstatuses+meanmedia+meanurls+meanhashtags+meanaccountlength|0|0|user_id, 
              data = ppanel)
felm5 <- felm(meanmention~lagtotalevent1+lagmeanmention+daystoholi+meanfollowers+meanfavourites+
                meanfriends+meanstatuses+meanmedia+meanurls+meanhashtags+meanaccountlength|0|0|user_id, 
              data = ppanel)
felm6 <- felm(meanscore~lagtotalevent1+lagmeanscore+meanmention+daystoholi+meanfollowers+meanfavourites+
                meanfriends+meanstatuses+meanmedia+meanurls+meanhashtags+meanaccountlength|0|0|user_id, 
              data = ppanel)
stargazer(list(felm4, felm5, felm6), 
          star.cutoffs=c(0.05, 0.01), no.space=TRUE, label="violence_mob_score")

### Table A17: Retweets by Different Types of Members
mod.retweet2 <- felm(mean_core_perip ~ meanscore+lagterritory+daystoholi+meanfollowers+meanfavourites+
                       meanfriends+meanstatuses+meanmedia+meanurls+meanhashtags+meanaccountlength|0|0|user_id, 
                     data = ppanel)
mod.retweet3 <- felm(mean_geo_core1_noncore1 ~ meanscore+lagterritory+daystoholi+meanfollowers+meanfavourites+
                       meanfriends+meanstatuses+meanmedia+meanurls+meanhashtags+meanaccountlength|0|0|user_id, 
                     data = ppanel)


stargazer(list(mod.retweet2, mod.retweet3),
          star.cutoffs=c(0.05, 0.01), no.space=TRUE, label="retweet")


### Appendix Table A5: Confusion Matrix
# tweet ids have been encoded
validation <- read.csv("../datasets/1_twitter_validation.csv", 
                    stringsAsFactors = FALSE)
validation$machinecode <- ifelse(validation$religscore>0, 1, ifelse(validation$religscore<0, -1, 0))

table(validation$machinecode, validation$handcode)

